Analysis of biomass

#stats to account for difference between treatments #Script: #1. Biomass second cut
#Data exploration
#assumptions test
#wilcox test for treatment #kruskal test for varieties #posthoc tests
#GLM
#boxcox
#2. Biomass first cut
#Data exploration
#assumptions test #wilcox test for treatment
#kruskal test for varieties
#posthoc tests

Data Exploration

summary(biomass$Cut_2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0040  0.5172  1.2165  1.6751  2.3182  9.3890
boxplot(biomass$Cut_2~biomass$Variety, ylab="Biomass [g]", main="Biomass of second cut")

boxplot(biomass$Cut_2~biomass$Treatment, ylab="Biomass [g]",main="Biomass of second cut")

hist(biomass$Cut_2, main="Biomass of second cut")

Normality tests for ttest

You can also embed plots, for example:

#t.test
#normality
# test for whole data
biomass_all=biomass$Cut_2
qqnorm(biomass_all)
qqline(biomass_all)

shapiro.test(biomass_all) #p value = <2.2*10^-16
## 
##  Shapiro-Wilk normality test
## 
## data:  biomass_all
## W = 0.80181, p-value < 2.2e-16
#data is not normally distributed
#try to transform the dependent variable
#log transformation
biomass_log=log(biomass_all)
qqnorm(biomass_log)
qqline(biomass_log)

shapiro.test(biomass_log) #1.210^-9
## 
##  Shapiro-Wilk normality test
## 
## data:  biomass_log
## W = 0.94679, p-value = 1.2e-09
#square root transformation
biomass_sqrt=sqrt(biomass_all)
qqnorm(biomass_sqrt)
qqline(biomass_sqrt)

shapiro.test(biomass_sqrt) #p value 6.59*10^-7
## 
##  Shapiro-Wilk normality test
## 
## data:  biomass_sqrt
## W = 0.96705, p-value = 6.59e-07
#independence
#equality of variance
#as assumption of normality is not met use mann whitney test
wilcox.test(biomass$Cut_2 ~ biomass$Treatment) #p-value = 2.179e-14
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  biomass$Cut_2 by biomass$Treatment
## W = 7310, p-value = 2.179e-14
## alternative hypothesis: true location shift is not equal to 0
#do kurskal wallis test for Varieties
kruskal.test(biomass$Cut_2~biomass$Variety) #p-value = 2.056e-12
## 
##  Kruskal-Wallis rank sum test
## 
## data:  biomass$Cut_2 by biomass$Variety
## Kruskal-Wallis chi-squared = 83.865, df = 13, p-value = 2.056e-12
#Calculate pairwise multiple comparisons between group levels. 
#These tests are sometimes referred to as Nemenyi-tests for multiple 
#comparisons of (mean) rank sums of independent samples
#--> not appropriate for unequal samples

# posthoc.kruskal.nemenyi.test(x=biomass$Cut_2, g=biomass$Variety, dist="Chisquare")

Linear model

#linear model with interaction
sum.lm=summary(lm(formula=biomass$Cut_2~biomass$Treatment*biomass$Variety))
lm=lm(formula=biomass$Cut_2~biomass$Treatment*biomass$Variety)
plot(lm)

## hat values (leverages) are all = 0.08333333
##  and there are no factor predictors; no plot no. 5

#lm without interaction
summary(lm(formula=biomass$Cut_2~biomass$Treatment+biomass$Variety))
## 
## Call:
## lm(formula = biomass$Cut_2 ~ biomass$Treatment + biomass$Variety)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7466 -0.7401 -0.1296  0.5248  6.9977 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.69425    0.27567   2.518 0.012276 *  
## biomass$TreatmenteCO2 + 2°C    1.30508    0.14236   9.168  < 2e-16 ***
## biomass$VarietyAbergain        0.06967    0.37664   0.185 0.853371    
## biomass$VarietyAspect          0.26962    0.37664   0.716 0.474594    
## biomass$VarietyCarraig         2.13962    0.37664   5.681 3.01e-08 ***
## biomass$VarietyDunluce        -0.17004    0.37664  -0.451 0.651957    
## biomass$VarietyLilora          1.28329    0.37664   3.407 0.000740 ***
## biomass$VarietyMoy            -0.30142    0.37664  -0.800 0.424143    
## biomass$VarietySemi-natural11 -0.49279    0.37664  -1.308 0.191679    
## biomass$VarietySemi-natural6   0.96692    0.37664   2.567 0.010705 *  
## biomass$VarietySemi-natural7  -0.60625    0.37664  -1.610 0.108464    
## biomass$VarietySolomon         1.44342    0.37664   3.832 0.000153 ***
## biomass$VarietyWild4          -0.21463    0.37664  -0.570 0.569185    
## biomass$VarietyWild6           0.27700    0.37664   0.735 0.462604    
## biomass$VarietyWild7          -0.06742    0.37664  -0.179 0.858055    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.305 on 321 degrees of freedom
## Multiple R-squared:  0.3912, Adjusted R-squared:  0.3646 
## F-statistic: 14.73 on 14 and 321 DF,  p-value: < 2.2e-16
lm.add=lm(formula=biomass$Cut_2~biomass$Treatment+biomass$Variety)
plot(lm)

## hat values (leverages) are all = 0.08333333
##  and there are no factor predictors; no plot no. 5

glm=glm(biomass$Cut_2~biomass$Treatment*biomass$Variety)
plot(glm)

## hat values (leverages) are all = 0.08333333
##  and there are no factor predictors; no plot no. 5

#try boxcox
lm.biom=lm(biomass$Cut_2~biomass$Variety*biomass$Treatment)
plot(lm.biom)

## hat values (leverages) are all = 0.08333333
##  and there are no factor predictors; no plot no. 5

bx.bio=boxcox(lm.biom)

lm.biom.bx=lm((biomass$Cut_2*0.4)~biomass$Variety*biomass$Treatment)
biomass2_log=log(biomass$Cut_2)
shapiro.test(biomass2_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  biomass2_log
## W = 0.94679, p-value = 1.2e-09
qqnorm(biomass2_log)
qqline(biomass2_log)

#boxcox return 0.3 -> try log transformation

biomass2_test=biomass$Cut_2^0.5
shapiro.test(biomass2_test)
## 
##  Shapiro-Wilk normality test
## 
## data:  biomass2_test
## W = 0.96705, p-value = 6.59e-07
qqnorm(biomass2_test)
qqline(biomass2_test)

biomass2_rezi=biomass$Cut_2*-1
shapiro.test(biomass2_rezi)
## 
##  Shapiro-Wilk normality test
## 
## data:  biomass2_rezi
## W = 0.80181, p-value < 2.2e-16

Analysis for the first cut

Data Exploration

summary(biomass$Cut_1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00040 0.01675 0.04000 0.07429 0.10425 0.50600
boxplot(biomass$Cut_1~biomass$Variety, ylab="Biomass [g]", main="Biomass of first cut")

boxplot(biomass$Cut_1~biomass$Treatment, ylab="Biomass [g]",main="Biomass of first cut")

hist(biomass$Cut_1, main="Biomass of first cut")

Assumptions test and significance tests

#Assumptions test
#normality
# test for whole data
qqnorm(biomass$Cut_1)
qqline(biomass$Cut_1)

shapiro.test(biomass$Cut_1) #p value = <2.2*10^-16
## 
##  Shapiro-Wilk normality test
## 
## data:  biomass$Cut_1
## W = 0.76989, p-value < 2.2e-16
#data is not normally distributed
#try to transform the dependent variable

#square root transformation
biomass_sqrt=sqrt(biomass$Cut_1)
qqnorm(biomass_sqrt)
qqline(biomass_sqrt)

shapiro.test(biomass_sqrt) #p value 4.1*10^-7
## 
##  Shapiro-Wilk normality test
## 
## data:  biomass_sqrt
## W = 0.94589, p-value = 9.433e-10
#log doesn't work because of zeros in data
#reciproce transformation
biomass_rezi=biomass$Cut_1*(-1)
qqnorm(biomass_rezi)
qqline(biomass_rezi)

shapiro.test(biomass_rezi) #p value: 2.2*10^-16
## 
##  Shapiro-Wilk normality test
## 
## data:  biomass_rezi
## W = 0.76989, p-value < 2.2e-16
#boxcox transformation
lm.biom1=lm(biomass$Cut_1~biomass$Treatment)
plot(lm.biom1)

## hat values (leverages) are all = 0.005952381
##  and there are no factor predictors; no plot no. 5

boxcox(lm.biom1)

#independence
#equality of variance
#as assumption of normality is not met use wilcox test instead

wilcox.test(biomass$Cut_1~biomass$Treatment) #p-value = 9.145e-16
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  biomass$Cut_1 by biomass$Treatment
## W = 6956, p-value = 9.145e-16
## alternative hypothesis: true location shift is not equal to 0
#do kurskal wallis test for Varieties
kruskal.test(biomass$Cut_1~biomass$Variety) #
## 
##  Kruskal-Wallis rank sum test
## 
## data:  biomass$Cut_1 by biomass$Variety
## Kruskal-Wallis chi-squared = 92.63, df = 13, p-value = 4.38e-14
#Calculate pairwise multiple comparisons between group levels. 
#These tests are sometimes referred to as Nemenyi-tests for multiple 
#comparisons of (mean) rank sums of independent samples
#--> not appropriate for unequal samples

# posthoc.kruskal.nemenyi.test(x=biomass$Cut_1, g=biomass$Variety, dist="Chisquare")

GLM

#GLM
plot(glm(biomass$Cut_1~biomass$Variety*biomass$Treatment))

## hat values (leverages) are all = 0.08333333
##  and there are no factor predictors; no plot no. 5

summary(glm(biomass$Cut_1~biomass$Variety*biomass$Treatment))
## 
## Call:
## glm(formula = biomass$Cut_1 ~ biomass$Variety * biomass$Treatment)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.20075  -0.03316  -0.00825   0.02162   0.32167  
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.0459167  0.0188848
## biomass$VarietyAbergain                                   -0.0146667  0.0267072
## biomass$VarietyAspect                                      0.0004167  0.0267072
## biomass$VarietyCarraig                                     0.0543333  0.0267072
## biomass$VarietyDunluce                                    -0.0176833  0.0267072
## biomass$VarietyLilora                                      0.0131667  0.0267072
## biomass$VarietyMoy                                        -0.0341667  0.0267072
## biomass$VarietySemi-natural11                             -0.0317500  0.0267072
## biomass$VarietySemi-natural6                               0.0061667  0.0267072
## biomass$VarietySemi-natural7                              -0.0374167  0.0267072
## biomass$VarietySolomon                                     0.0185833  0.0267072
## biomass$VarietyWild4                                      -0.0104167  0.0267072
## biomass$VarietyWild6                                      -0.0215000  0.0267072
## biomass$VarietyWild7                                      -0.0183833  0.0267072
## biomass$TreatmenteCO2 + 2°C                                0.0550333  0.0267072
## biomass$VarietyAbergain:biomass$TreatmenteCO2 + 2°C       -0.0023667  0.0377696
## biomass$VarietyAspect:biomass$TreatmenteCO2 + 2°C         -0.0004500  0.0377696
## biomass$VarietyCarraig:biomass$TreatmenteCO2 + 2°C         0.1104667  0.0377696
## biomass$VarietyDunluce:biomass$TreatmenteCO2 + 2°C         0.0040667  0.0377696
## biomass$VarietyLilora:biomass$TreatmenteCO2 + 2°C          0.0422167  0.0377696
## biomass$VarietyMoy:biomass$TreatmenteCO2 + 2°C            -0.0126750  0.0377696
## biomass$VarietySemi-natural11:biomass$TreatmenteCO2 + 2°C -0.0148667  0.0377696
## biomass$VarietySemi-natural6:biomass$TreatmenteCO2 + 2°C   0.0474667  0.0377696
## biomass$VarietySemi-natural7:biomass$TreatmenteCO2 + 2°C  -0.0322000  0.0377696
## biomass$VarietySolomon:biomass$TreatmenteCO2 + 2°C         0.0688000  0.0377696
## biomass$VarietyWild4:biomass$TreatmenteCO2 + 2°C          -0.0129500  0.0377696
## biomass$VarietyWild6:biomass$TreatmenteCO2 + 2°C           0.0184667  0.0377696
## biomass$VarietyWild7:biomass$TreatmenteCO2 + 2°C          -0.0054000  0.0377696
##                                                           t value Pr(>|t|)   
## (Intercept)                                                 2.431   0.0156 * 
## biomass$VarietyAbergain                                    -0.549   0.5833   
## biomass$VarietyAspect                                       0.016   0.9876   
## biomass$VarietyCarraig                                      2.034   0.0428 * 
## biomass$VarietyDunluce                                     -0.662   0.5084   
## biomass$VarietyLilora                                       0.493   0.6224   
## biomass$VarietyMoy                                         -1.279   0.2018   
## biomass$VarietySemi-natural11                              -1.189   0.2354   
## biomass$VarietySemi-natural6                                0.231   0.8175   
## biomass$VarietySemi-natural7                               -1.401   0.1622   
## biomass$VarietySolomon                                      0.696   0.4871   
## biomass$VarietyWild4                                       -0.390   0.6968   
## biomass$VarietyWild6                                       -0.805   0.4214   
## biomass$VarietyWild7                                       -0.688   0.4918   
## biomass$TreatmenteCO2 + 2°C                                 2.061   0.0402 * 
## biomass$VarietyAbergain:biomass$TreatmenteCO2 + 2°C        -0.063   0.9501   
## biomass$VarietyAspect:biomass$TreatmenteCO2 + 2°C          -0.012   0.9905   
## biomass$VarietyCarraig:biomass$TreatmenteCO2 + 2°C          2.925   0.0037 **
## biomass$VarietyDunluce:biomass$TreatmenteCO2 + 2°C          0.108   0.9143   
## biomass$VarietyLilora:biomass$TreatmenteCO2 + 2°C           1.118   0.2645   
## biomass$VarietyMoy:biomass$TreatmenteCO2 + 2°C             -0.336   0.7374   
## biomass$VarietySemi-natural11:biomass$TreatmenteCO2 + 2°C  -0.394   0.6941   
## biomass$VarietySemi-natural6:biomass$TreatmenteCO2 + 2°C    1.257   0.2098   
## biomass$VarietySemi-natural7:biomass$TreatmenteCO2 + 2°C   -0.853   0.3946   
## biomass$VarietySolomon:biomass$TreatmenteCO2 + 2°C          1.822   0.0695 . 
## biomass$VarietyWild4:biomass$TreatmenteCO2 + 2°C           -0.343   0.7319   
## biomass$VarietyWild6:biomass$TreatmenteCO2 + 2°C            0.489   0.6252   
## biomass$VarietyWild7:biomass$TreatmenteCO2 + 2°C           -0.143   0.8864   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.004279637)
## 
##     Null deviance: 2.4322  on 335  degrees of freedom
## Residual deviance: 1.3181  on 308  degrees of freedom
## AIC: -850.22
## 
## Number of Fisher Scoring iterations: 2